rm(list=ls())

library(openxlsx)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(readr)
library(tokenizers)
library(dplyr)
library(countrycode)
library(survival)
library(zoo)
library(ggthemes)
library(tidyr)
library(sp)
library(plyr)
library(estprod)
library(stringr)


recode_countries <- function(vec, scheme_1, scheme_2){
  # new values come first
  recode_vals <- as.character(unique(countryref[countryref$type == 'country', scheme_2]))
  names(recode_vals) <- as.character(unique(countryref[countryref$type == 'country', scheme_1]))
  
  recode_vals <- recode_vals[!is.na(names(recode_vals))]
  
  
  vec <- recode(vec, !!!recode_vals)
  return(vec)
}

borders_unga_speeches <- read.csv('kw_relevance_predicted_set.csv', stringsAsFactors = FALSE)
full_unga_speeches <- read.csv('UNGAplenary_tiles_v5.csv', stringsAsFactors = FALSE)

# filter non-relevant undergrad speeches
borders_unga_speeches <- borders_unga_speeches %>% select(-starts_with('x')) %>% filter(Predicted == 1)

full_unga_speeches$n_words <- count_words(full_unga_speeches$text)
borders_unga_speeches$n_words <- count_words(borders_unga_speeches$text)

full_unga_speeches$n_words <- count_words(full_unga_speeches$text)
borders_unga_speeches$n_words <- count_words(borders_unga_speeches$text)

# Mention DV
border_tiles <- full_unga_speeches %>% group_by(country, year) %>% dplyr::summarise(tiles_overall = n())# %>% left_join(border_tiles)
border_tiles <- borders_unga_speeches %>% group_by(country, year) %>% dplyr::summarise(tiles_borders = n()) %>% right_join(border_tiles)

border_tiles <- full_unga_speeches %>% group_by(country, year) %>% dplyr::summarise(words_overall = sum(n_words)) %>% right_join(border_tiles)
border_tiles <- borders_unga_speeches %>% group_by(country, year) %>% dplyr::summarise(words_borders = sum(n_words)) %>% right_join(border_tiles)

border_tiles <- replace_na(border_tiles, list(tiles_borders = 0, words_borders = 0, igo_mentions_borders = 0, igo_mentions_overall = 0))

border_tiles$prop_tiles <- border_tiles$tiles_borders/border_tiles$tiles_overall
border_tiles$prop_words <- border_tiles$words_borders/border_tiles$words_overall

# drop EC, EU
border_tiles <- border_tiles %>% filter(!country %in% c('EC', 'EU'))

# Sentiment DV
load('sentiment_tiles.RData')
sentiment <- tiles
border_tiles <- sentiment %>% select(country, year, predicted_sentiment) %>% right_join(border_tiles)

#Table E1
# top sentiment countries
border_tiles %>% group_by(country) %>% 
  dplyr::summarise(mean_sentiment = mean(predicted_sentiment, na.rm=TRUE)) %>% arrange(mean_sentiment)
